From 6b0982272b177306647d2bf3d42554651351bc06 Mon Sep 17 00:00:00 2001 From: "rn@wyvis.camb.intel-research.net" Date: Tue, 11 Feb 2003 16:44:27 +0000 Subject: [PATCH] bitkeeper revision 1.24 (3e49286b1h6iIeNsarUfoWbDnP6Ndw) New time implementation. Does system and wallclock time for Xen and GuestOSes --- xen-2.4.16/arch/i386/apic.c | 32 +- xen-2.4.16/arch/i386/entry.S | 2 +- xen-2.4.16/arch/i386/time.c | 116 ++++-- xen-2.4.16/common/ac_timer.c | 7 +- xen-2.4.16/common/domain.c | 26 +- xen-2.4.16/common/kernel.c | 1 + xen-2.4.16/common/schedule.c | 13 +- xen-2.4.16/common/timer.c | 4 +- xen-2.4.16/include/asm-i386/time.h | 34 +- .../include/hypervisor-ifs/hypervisor-if.h | 18 +- xen-2.4.16/include/xeno/sched.h | 2 +- xen-2.4.16/include/xeno/time.h | 13 + .../arch/xeno/kernel/time.c | 336 +++++++++++------- 13 files changed, 366 insertions(+), 238 deletions(-) diff --git a/xen-2.4.16/arch/i386/apic.c b/xen-2.4.16/arch/i386/apic.c index d479a69322..c9b096b69f 100644 --- a/xen-2.4.16/arch/i386/apic.c +++ b/xen-2.4.16/arch/i386/apic.c @@ -48,7 +48,6 @@ #include - #undef APIC_TIME_TRACE #ifdef APIC_TIME_TRACE #define TRC(_x) _x @@ -511,7 +510,7 @@ void __init init_apic_mappings(void) static unsigned int bus_freq; static u32 bus_cycle; /* length of one bus cycle in pico-seconds */ static u32 bus_scale; /* scaling factor convert ns to bus cycles */ - +u64 cpu_freq; /* * The timer chip is already set up at HZ interrupts per second here, @@ -643,6 +642,8 @@ int __init calibrate_APIC_clock(void) result/(1000000/HZ), result%(1000000/HZ)); + cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ); + /* set up multipliers for accurate timer code */ bus_freq = result*HZ; bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */ @@ -676,13 +677,12 @@ void __init setup_APIC_clocks (void) } #undef APIC_DIVISOR + /* * reprogram the APIC timer. Timeoutvalue is in ns from start of boot * returns 1 on success * returns 0 if the timeout value is too small or in the past. */ - - int reprogram_ac_timer(s_time_t timeout) { int cpu = smp_processor_id(); @@ -695,8 +695,8 @@ int reprogram_ac_timer(s_time_t timeout) if (expire <= 0) { - printk("APICT[%02d] Timeout value in the past %lld > %lld\n", - cpu, now, timeout); + printk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", + cpu, (u32)(now>>32), (u32)now, (u32)(timeout>>32),(u32)timeout); return 0; /* timeout value in the past */ } @@ -728,18 +728,27 @@ int reprogram_ac_timer(s_time_t timeout) * the timer APIC on CPU does not go off every 10ms or so the linux * timers loose accuracy, but that shouldn't be a problem. */ - -static s_time_t last_cpu0_tirq = 0; +//static s_time_t last_cpu0_tirq = 0; inline void smp_local_timer_interrupt(struct pt_regs * regs) { int cpu = smp_processor_id(); - s_time_t diff, now; + //s_time_t diff, now; /* if CPU 0 do old timer stuff */ if (cpu == 0) { - update_time(); + + /* + * XXX RN: the following code should be moved here or somewhere + * else. It's currently done using the 8255 timer interrupt, which + * I'd like to disable. But, APIC initialisation relies on it, + * e.g., timer interrupts coming in, jiffies going up, etc. Need to + * clean this up. Also see ./arch/i386/time.c + */ +#if 0 + //update_time();/* XXX should use a timer for this */ now = NOW(); diff = now - last_cpu0_tirq; + /* this uses three 64bit divisions which should be avoided!! */ if (diff >= MILLISECS(10)) { /* update jiffies */ @@ -749,8 +758,9 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs) do_timer(regs); last_cpu0_tirq = now; } +#endif } - /* call timer function */ + /* call accurate timer function */ do_ac_timer(); } diff --git a/xen-2.4.16/arch/i386/entry.S b/xen-2.4.16/arch/i386/entry.S index 103e9e0888..481e39c80b 100644 --- a/xen-2.4.16/arch/i386/entry.S +++ b/xen-2.4.16/arch/i386/entry.S @@ -520,7 +520,7 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(do_stack_and_ldt_switch) .long SYMBOL_NAME(do_net_update) .long SYMBOL_NAME(do_fpu_taskswitch) - .long SYMBOL_NAME(do_yield) + .long SYMBOL_NAME(do_sched_op) .long SYMBOL_NAME(kill_domain) .long SYMBOL_NAME(do_dom0_op) .long SYMBOL_NAME(do_network_op) diff --git a/xen-2.4.16/arch/i386/time.c b/xen-2.4.16/arch/i386/time.c index 4b4ac515cb..d090fe46bc 100644 --- a/xen-2.4.16/arch/i386/time.c +++ b/xen-2.4.16/arch/i386/time.c @@ -11,7 +11,8 @@ * * Environment: Xen Hypervisor * Description: modified version of Linux' time.c - * implement system and wall clock time. + * implements system and wall clock time. + * based on freebsd's implementation. * **************************************************************************** * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ @@ -53,11 +54,6 @@ unsigned long ticks_per_usec; /* TSC ticks per microsecond. */ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; - -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick - */ int timer_ack=0; extern spinlock_t i8259A_lock; static inline void do_timer_interrupt(int irq, @@ -78,6 +74,8 @@ static inline void do_timer_interrupt(int irq, spin_unlock(&i8259A_lock); } #endif + + /* XXX RN: Want to remove this but APIC-SMP code seems to rely on it */ do_timer(regs); } @@ -235,30 +233,59 @@ static unsigned long get_cmos_time(void) } /*************************************************************************** - * System time + * Time + * XXX RN: Will be able to remove some of the locking once the time is + * update by the APIC on only one CPU. ***************************************************************************/ -u32 stime_pcc; /* cycle counter value at last timer irq */ -u32 stime_scale; /* scale factor for converting cc to ns */ -s_time_t stime_now; /* time in ns at last timer IRQ */ -/*************************************************************************** - * Wall Clock time - ***************************************************************************/ -static rwlock_t wctime_lock = RW_LOCK_UNLOCKED; -struct timeval wall_clock_time; /* wall clock time at last update */ -s_time_t wctime_st; /* system time at last update */ +static spinlock_t stime_lock; +static u32 st_scale_f; +static u32 st_scale_i; +u32 stime_pcc; /* cycle counter value at last timer irq */ +s_time_t stime_now; /* time in ns at last timer IRQ */ + +s_time_t get_s_time(void) +{ + unsigned long flags; + u32 delta_tsc, low, pcc; + u64 delta; + s_time_t now; + + spin_lock_irqsave(&stime_lock, flags); + + pcc = stime_pcc; + now = stime_now; + + /* only use bottom 32bits of TSC. This should be sufficient */ + rdtscl(low); + delta_tsc = low - pcc; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + + spin_unlock_irqrestore(&stime_lock, flags); + + return now + delta; +} + + +/* Wall Clock time */ +static spinlock_t wctime_lock; +struct timeval wall_clock_time; /* wall clock time at last update */ +s_time_t wctime_st; /* system time at last update */ void do_gettimeofday(struct timeval *tv) { unsigned long flags; unsigned long usec, sec; - read_lock_irqsave(&wctime_lock, flags); + spin_lock_irqsave(&wctime_lock, flags); usec = ((unsigned long)(NOW() - wctime_st))/1000; sec = wall_clock_time.tv_sec; usec += wall_clock_time.tv_usec; - read_unlock_irqrestore(&wctime_lock, flags); + + spin_unlock_irqrestore(&wctime_lock, flags); while (usec >= 1000000) { usec -= 1000000; @@ -276,22 +303,26 @@ void do_settimeofday(struct timeval *tv) /*************************************************************************** * Update times ***************************************************************************/ - /* update hypervisors notion of time */ void update_time(void) { - u32 new_pcc; - s_time_t new_st; - unsigned long usec; + unsigned long flags; + u32 new_pcc; + s_time_t new_st; + unsigned long usec; - /* update system time */ + new_st = NOW(); rdtscl(new_pcc); - stime_now = stime_now+((((s_time_t)stime_scale)* - (new_pcc-stime_pcc))>>10); + + /* update system time */ + spin_lock_irqsave(&stime_lock, flags); + + stime_now = new_st; stime_pcc=new_pcc; + spin_unlock_irqrestore(&stime_lock, flags); + /* update wall clock time */ - write_lock(&wctime_lock); - new_st = NOW(); + spin_lock_irqsave(&wctime_lock, flags); usec = ((unsigned long)(new_st - wctime_st))/1000; usec += wall_clock_time.tv_usec; while (usec >= 1000000) { @@ -300,10 +331,10 @@ void update_time(void) { } wall_clock_time.tv_usec = usec; wctime_st = new_st; - write_unlock(&wctime_lock); + spin_unlock_irqrestore(&wctime_lock, flags); - TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld, wct=%ld:%ld\n" - cpu, stime_now, new_st, wall_clock_time.tv_sec, + TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld,wct=%ld:%ld\n", + smp_processor_id(), stime_now, new_st, wall_clock_time.tv_sec, wall_clock_time.tv_usec)); } @@ -311,13 +342,20 @@ void update_time(void) { void update_dom_time(shared_info_t *si) { unsigned long flags; - read_lock_irqsave(&wctime_lock, flags); + + spin_lock_irqsave(&stime_lock, flags); si->system_time = stime_now; si->st_timestamp = stime_pcc; + spin_unlock_irqrestore(&stime_lock, flags); + + spin_lock_irqsave(&wctime_lock, flags); si->tv_sec = wall_clock_time.tv_sec; si->tv_usec = wall_clock_time.tv_usec; si->wc_timestamp = wctime_st; - read_unlock_irqrestore(&wctime_lock, flags); + si->wc_version++; + spin_unlock_irqrestore(&wctime_lock, flags); + + TRC(printk(" 0x%08X%08X\n", (u32)(wctime_st>>32), (u32)wctime_st)); } /*************************************************************************** @@ -328,17 +366,26 @@ int __init init_xeno_time() { int cpu = smp_processor_id(); u32 cpu_cycle; /* time of one cpu cyle in pico-seconds */ + u64 scale; + + spin_lock_init(&stime_lock); + spin_lock_init(&wctime_lock); /* System Time */ cpu_cycle = (u32) (1000000000LL/cpu_khz); /* in pico seconds */ - stime_scale = (cpu_cycle * 1024) / 1000; + scale = 1000000000LL << 32; + scale /= cpu_freq; + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; stime_now = (s_time_t)0; rdtscl(stime_pcc); printk("Init Time[%02d]:\n", cpu); printk(".... System Time: %lldns\n", NOW()); - printk(".... stime_scale: %u\n", stime_scale); + printk(".....cpu_cycle: %u ps\n", cpu_cycle); + printk(".... st_scale_f: %X\n", st_scale_f); + printk(".... st_scale_i: %X\n", st_scale_i); printk(".... stime_pcc: %u\n", stime_pcc); /* Wall Clock time */ @@ -367,7 +414,6 @@ void __init time_init(void) ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC); cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); diff --git a/xen-2.4.16/common/ac_timer.c b/xen-2.4.16/common/ac_timer.c index dec3f34646..45daa4c35d 100644 --- a/xen-2.4.16/common/ac_timer.c +++ b/xen-2.4.16/common/ac_timer.c @@ -91,7 +91,9 @@ int add_ac_timer(struct ac_timer *timer) TRC(printk("ACT [%02d] add(): now=%lld timo=%lld\n", cpu, now, timer->expires)); if (timer->expires <= now) { - printk("ACT[%02d] add_ac_timer(): timeout value in the past\n", cpu); + printk("ACT[%02d] add_ac_timer: now=0x%08X%08X > expire=0x%08X%08X\n", + cpu, (u32)(now>>32), (u32)now, + (u32)(timer->expires>>32), (u32)timer->expires); return 1; } @@ -100,7 +102,8 @@ int add_ac_timer(struct ac_timer *timer) /* check if timer would be inserted at start of list */ if ((list_empty(&ac_timers[cpu].timers)) || (timer->expires < - (list_entry(&ac_timers[cpu].timers, struct ac_timer, timer_list))->expires)) { + (list_entry(&ac_timers[cpu].timers, + struct ac_timer, timer_list))->expires)) { TRC(printk("ACT [%02d] add(): add at head\n", cpu)); /* Reprogramm and add to head of list */ diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c index 19a2f8119b..0392d2b3a3 100644 --- a/xen-2.4.16/common/domain.c +++ b/xen-2.4.16/common/domain.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -160,8 +161,7 @@ void release_task(struct task_struct *p) free_task_struct(p); } - -static unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) +unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) { struct list_head *temp; struct pfn_info *pf, *pf_head; @@ -233,7 +233,6 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo) l1_pgentry_t * l1tab; start_info_t * virt_startinfo_addr; unsigned long virt_stack_addr; - unsigned long long time; unsigned long phys_l2tab; page_update_request_t * pgt_updates; unsigned long curr_update_phys; @@ -285,10 +284,9 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo) unmap_domain_mem((void *)((unsigned long)l1tab & PAGE_MASK)); /* set up the shared info structure */ - rdtscll(time); - p->shared_info->wall_time = time; - p->shared_info->domain_time = time; - p->shared_info->ticks_per_ms = ticks_per_usec * 1000; + update_dom_time(p->shared_info); + p->shared_info->cpu_freq = cpu_freq; + p->shared_info->domain_time = 0; /* we pass start info struct to guest os as function parameter on stack */ virt_startinfo_addr = (start_info_t *)meminfo->virt_startinfo_addr; @@ -389,7 +387,6 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) unsigned long virt_load_address, virt_stack_address, virt_shinfo_address; unsigned long virt_ftable_start, virt_ftable_end, ft_mapping; start_info_t *virt_startinfo_address; - unsigned long long time; unsigned long count; unsigned long alloc_index; unsigned long ft_pages; @@ -521,10 +518,10 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) unmap_domain_mem(l1start); /* Set up shared info area. */ - rdtscll(time); - p->shared_info->wall_time = time; - p->shared_info->domain_time = time; - p->shared_info->ticks_per_ms = ticks_per_usec * 1000; + update_dom_time(p->shared_info); + p->shared_info->cpu_freq = cpu_freq; + p->shared_info->domain_time = 0; + virt_startinfo_address = (start_info_t *) (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT)); @@ -640,11 +637,6 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) void __init domain_init(void) { printk("Initialising domains\n"); -// scheduler_init(); } - -#if 0 - } -} diff --git a/xen-2.4.16/common/kernel.c b/xen-2.4.16/common/kernel.c index fe67b0f14b..8ee79b8149 100644 --- a/xen-2.4.16/common/kernel.c +++ b/xen-2.4.16/common/kernel.c @@ -182,6 +182,7 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) { panic("Could not set up DOM0 guest OS\n"); } + update_dom_time(new_dom->shared_info); wake_up(new_dom); cpu_idle(); diff --git a/xen-2.4.16/common/schedule.c b/xen-2.4.16/common/schedule.c index a835151cc5..49ccc194b6 100644 --- a/xen-2.4.16/common/schedule.c +++ b/xen-2.4.16/common/schedule.c @@ -275,8 +275,7 @@ asmlinkage void schedule(void) if ( prev->state == TASK_DYING ) release_task(prev); same_process: - - update_dom_time(next->shared_info); + update_dom_time(current->shared_info); if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) goto need_resched_back; @@ -294,20 +293,22 @@ static void sched_timer(unsigned long foo) if (count[cpu] >= 5) { set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); count[cpu] = 0; + if (cpu == 0) + update_time(); /* XXX RN: Should be moved on its own timer */ } count[cpu]++; again: now = NOW(); s_timer[cpu].expires = now + MILLISECS(10); + res=add_ac_timer(&s_timer[cpu]); TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n", cpu, (u32)(now>>32), (u32)now, (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires)); - res=add_ac_timer(&s_timer[cpu]); - if (res==1) { + if (res==1) goto again; - } + } /* * Initialise the data structures @@ -340,6 +341,6 @@ void schedulers_start(void) { printk("Start schedulers\n"); __cli(); sched_timer(0); - smp_call_function(sched_timer, (void*)0, 1, 1); + smp_call_function(sched_timer, NULL, 1, 1); __sti(); } diff --git a/xen-2.4.16/common/timer.c b/xen-2.4.16/common/timer.c index 77e511de34..da0452249e 100644 --- a/xen-2.4.16/common/timer.c +++ b/xen-2.4.16/common/timer.c @@ -588,7 +588,6 @@ void do_timer(struct pt_regs *regs) { struct task_struct *p; shared_info_t *s; - unsigned long long wall; unsigned long cpu_mask = 0; (*(unsigned long *)&jiffies)++; @@ -596,8 +595,7 @@ void do_timer(struct pt_regs *regs) if ( !using_apic_timer ) update_process_times(user_mode(regs)); - rdtscll(wall); - + /* XXX RN: Move this for virtual domain time timer interrupts */ read_lock(&tasklist_lock); p = &idle0_task; do { diff --git a/xen-2.4.16/include/asm-i386/time.h b/xen-2.4.16/include/asm-i386/time.h index 9e2f77727d..9825847be1 100644 --- a/xen-2.4.16/include/asm-i386/time.h +++ b/xen-2.4.16/include/asm-i386/time.h @@ -37,37 +37,9 @@ static inline cc_time_t get_cc_time() /* * System Time */ -typedef s64 s_time_t; /* System time */ -extern u32 stime_pcc; /* cycle counter value at last timer irq */ -extern s_time_t stime_now; /* time in ns at last timer IRQ */ -extern u32 stime_scale; /* scale factur for converting cc to ns */ - - -/* - * This is the Nemesis implementation. - * The variables are all set in apic.c - * Every timer IRQ time_now and time_pcc is set to the current values - * At callibration time_scale is set - */ -static s_time_t get_s_time(void) -{ - u32 delta, low, pcc; - s_time_t now; - s_time_t incr; - - /* read two values (pcc, now) "atomically" */ -again: - pcc = stime_pcc; - now = stime_now; - if (stime_pcc != pcc) goto again; - - /* only use bottom 32bits of TSC. This should be sufficient */ - rdtscl(low); - delta = low - pcc; - - incr = ((s_time_t)(stime_scale) * delta) >> 10; - return now + incr; -} +typedef s64 s_time_t; /* System time */ +extern u32 stime_pcc; /* cycle counter value at last timer irq */ +extern s_time_t stime_now; /* time in ns at last timer IRQ */ /* update time variables once in a while */ extern void update_time(void); diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h index 5f86f4fd58..74d0bea508 100644 --- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h @@ -72,7 +72,7 @@ typedef struct #define __HYPERVISOR_stack_and_ldt_switch 4 #define __HYPERVISOR_net_update 5 #define __HYPERVISOR_fpu_taskswitch 6 -#define __HYPERVISOR_yield 7 +#define __HYPERVISOR_sched_op 7 #define __HYPERVISOR_exit 8 #define __HYPERVISOR_dom0_op 9 #define __HYPERVISOR_network_op 10 @@ -160,16 +160,20 @@ typedef struct shared_info_st { * The following abstractions are exposed: System Time, Wall Clock * Time, Domain Virtual Time. Domains can access Cycle counter time * directly. + * XXX RN: Need something to pass NTP scaling to GuestOS. */ + u64 cpu_freq; /* to calculate ticks -> real time */ + /* System Time */ - long long system_time; /* in ns */ - unsigned long st_timestamp; /* cyclecounter at last update */ - unsigned long ticks_per_ms; /* CPU ticks per millisecond */ + long long system_time; /* in ns */ + unsigned long st_timestamp; /* cyclecounter at last update */ + /* Wall Clock Time */ - long tv_sec; /* essentially a struct timeval */ - long tv_usec; - long long wc_timestamp; /* system time at last update */ + u32 wc_version; /* a version number for info below */ + long tv_sec; /* essentially a struct timeval */ + long tv_usec; + long long wc_timestamp; /* system time at last update */ /* Domain Virtual Time */ unsigned long long domain_time; diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h index d0fdab4948..9f5917c969 100644 --- a/xen-2.4.16/include/xeno/sched.h +++ b/xen-2.4.16/include/xeno/sched.h @@ -193,7 +193,7 @@ extern unsigned long wait_init_idle; #define set_current_state(_s) do { current->state = (_s); } while (0) #define MAX_SCHEDULE_TIMEOUT LONG_MAX void scheduler_init(void); -void start_scheduler(void); +void schedulers_start(void); void sched_add_domain(struct task_struct *p); void sched_rem_domain(struct task_struct *p); int wake_up(struct task_struct *p); diff --git a/xen-2.4.16/include/xeno/time.h b/xen-2.4.16/include/xeno/time.h index 5bb717fb2d..a017b0d2b0 100644 --- a/xen-2.4.16/include/xeno/time.h +++ b/xen-2.4.16/include/xeno/time.h @@ -31,11 +31,18 @@ #include /* pull in architecture specific time definition */ #include +/* + * Init time + */ +extern int init_xeno_time(); + /* * Cycle Counter Time (defined in asm/time.h) */ +extern u64 cpu_freq; + /* * System Time * 64 bit value containing the nanoseconds elapsed since boot time. @@ -44,6 +51,9 @@ * The other macros are for convenience to approximate short intervals * of real time into system time */ + +s_time_t get_s_time(void); + #define NOW() ((s_time_t)get_s_time()) #define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) #define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) @@ -79,6 +89,9 @@ extern void (*do_get_fast_time)(struct timeval *); /* XXX Interface for getting and setting still missing */ +/* update the per domain time information */ +extern void update_dom_time(shared_info_t *si); + /* XXX move this */ extern void do_timer(struct pt_regs *regs); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c index 2557918c6d..9f53010b73 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c @@ -1,3 +1,25 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: arch.xeno/time.c + * Author: Rolf Neugebauer + * Changes: + * + * Date: Nov 2002 + * + * Environment: XenoLinux + * Description: Interface with Hypervisor to get correct notion of time + * Currently supports Systemtime and WallClock time. + * + * (This has hardly any resemblence with the Linux code but left the + * copyright notice anyway. Ignore the comments in the copyright notice.) + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + /* * linux/arch/i386/kernel/time.c * @@ -30,19 +52,6 @@ * serialize accesses to xtime/lost_ticks). */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include #include #include #include @@ -51,94 +60,110 @@ #include #include -#include -#include -#include - +#include #include +#include +#include +#include +#include +#include #include - -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ - -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -unsigned long fast_gettimeoffset_quotient; - -extern rwlock_t xtime_lock; -extern unsigned long wall_jiffies; +#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t xtime_lock; -static inline unsigned long ticks_to_secs(unsigned long long ticks) -{ - unsigned long lo, hi; - unsigned long little_ticks; - - little_ticks = ticks /* XXX URK! XXX / 1000000ULL */; - - __asm__ __volatile__ ( - "mull %2" - : "=a" (lo), "=d" (hi) - : "rm" (fast_gettimeoffset_quotient), "0" (little_ticks) ); +unsigned long cpu_khz; /* get this from Xen, used elsewhere */ +static spinlock_t hyp_stime_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t hyp_wctime_lock = SPIN_LOCK_UNLOCKED; - return(hi); -} +static u32 st_scale_f; +static u32 st_scale_i; +static u32 shadow_st_pcc; +static s64 shadow_st; -/* NB. Only 32 bits of ticks are considered here. */ -static inline unsigned long ticks_to_us(unsigned long ticks) +/* + * System time. + * Although the rest of the Linux kernel doesn't know about this, we + * we use it to extrapolate passage of wallclock time. + * We need to read the values from the shared info page "atomically" + * and use the cycle counter value as the "version" number. Clashes + * should be very rare. + */ +static inline long long get_s_time(void) { - unsigned long lo, hi; + unsigned long flags; + u32 delta_tsc, low, pcc; + u64 delta; + s64 now; - __asm__ __volatile__ ( - "mull %2" - : "=a" (lo), "=d" (hi) - : "rm" (fast_gettimeoffset_quotient), "0" (ticks) ); - - return(hi); -} - -static long long get_s_time(void) -{ - u32 delta, low, pcc; - long long now; - long long incr; + spin_lock_irqsave(&hyp_stime_lock, flags); - /* read two values (pcc, now) "atomically" */ -again: pcc = HYPERVISOR_shared_info->st_timestamp; - now = HYPERVISOR_shared_info->system_time; - if (HYPERVISOR_shared_info->st_timestamp != pcc) goto again; - + mb(); + if (pcc != shadow_st_pcc) { +st_again: + shadow_st_pcc = HYPERVISOR_shared_info->st_timestamp; + shadow_st = HYPERVISOR_shared_info->system_time; + pcc = HYPERVISOR_shared_info->st_timestamp; + mb(); + if (pcc != shadow_st_pcc) + goto st_again; + } + + now = shadow_st; /* only use bottom 32bits of TSC. This should be sufficient */ rdtscl(low); - delta = low - pcc; + delta_tsc = low - pcc; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + spin_unlock_irqrestore(&hyp_time_lock, flags); + return now + delta; - incr = ((long long)(ticks_to_us(delta)*1000)); - return now + incr; } #define NOW() ((long long)get_s_time()) /* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. + * Wallclock time. + * Based on what the hypervisor tells us, extrapolated using system time. + * Again need to read a number of values from the shared page "atomically". + * this time using a version number. */ +static u32 shadow_wc_version=0; +static long shadow_tv_sec; +static long shadow_tv_usec; +static long long shadow_wc_timestamp; void do_gettimeofday(struct timeval *tv) { - unsigned long flags; - unsigned long usec, sec; - - read_lock_irqsave(&xtime_lock, flags); - - usec = ((unsigned long)(NOW()-HYPERVISOR_shared_info->wc_timestamp))/1000; - sec = HYPERVISOR_shared_info->tv_sec; - usec += HYPERVISOR_shared_info->tv_usec; - - read_unlock_irqrestore(&xtime_lock, flags); + unsigned long flags; + long usec, sec; + u32 version; + u64 now; + + spin_lock_irqsave(&hyp_wctime_lock, flags); + + version = HYPERVISOR_shared_info->wc_version; + mb(); + if (version != shadow_wc_version) { + wc_again: + shadow_wc_version = HYPERVISOR_shared_info->wc_version; + shadow_tv_sec = HYPERVISOR_shared_info->tv_sec; + shadow_tv_usec = HYPERVISOR_shared_info->tv_usec; + shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp; + shadow_wc_version = HYPERVISOR_shared_info->wc_version; + version = HYPERVISOR_shared_info->wc_version; + mb(); + if (version != shadow_wc_version) + goto wc_again; + } + + now = NOW(); + usec = ((unsigned long)(now-shadow_wc_timestamp))/1000; + sec = shadow_tv_sec; + usec += shadow_tv_usec; while ( usec >= 1000000 ) { @@ -148,11 +173,39 @@ void do_gettimeofday(struct timeval *tv) tv->tv_sec = sec; tv->tv_usec = usec; + + spin_unlock_irqrestore(&hyp_time_lock, flags); + +#ifdef XENO_TIME_DEBUG + { + static long long old_now=0; + static long long wct=0, old_wct=0; + + /* This debug code checks if time increase over two subsequent calls */ + wct=(((long long)sec) * 1000000) + usec; + /* wall clock time going backwards */ + if ((wct < old_wct) ) { + printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n", + (long)(wct-old_wct), usec, usec); + printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + + /* system time going backwards */ + if (now<=old_now) { + printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + old_wct = wct; + old_now = now; + } +#endif + } void do_settimeofday(struct timeval *tv) { -/* XXX RN: shoudl do something special here for dom0 */ +/* XXX RN: should do something special here for dom0 */ #if 0 write_lock_irq(&xtime_lock); /* @@ -181,25 +234,68 @@ void do_settimeofday(struct timeval *tv) /* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * Timer ISR. + * Unlike normal Linux these don't come in at a fixed rate of HZ. + * In here we wrok out how often it should have been called and then call + * the architecture independent part (do_timer()) the appropriate number of + * times. A bit of a nasty hack, to keep the "other" notion of wallclock time + * happy. */ -static inline void do_timer_interrupt( - int irq, void *dev_id, struct pt_regs *regs) +static long long us_per_tick=1000000/HZ; +static long long last_irq; +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { - do_timer(regs); + struct timeval tv; + long long time, delta; + +#ifdef XENO_TIME_DEBUG + static u32 foo_count = 0; + foo_count++; + if (foo_count>= 10000) { + s64 n = NOW(); + struct timeval tv; + do_gettimeofday(&tv); + printk("0x%08X%08X %ld:%ld\n", + (u32)(n>>32), (u32)n, tv.tv_sec, tv.tv_usec); + foo_count = 0; + } +#endif + + /* + * The next bit really sucks: + * Linux not only uses do_gettimeofday() to keep a notion of + * wallclock time, but also maintains the xtime struct and jiffies. + * (Even worse some userland code accesses this via the sys_time() + * system call) + * Unfortunately, xtime is maintain in the architecture independent + * part of the timer ISR (./kernel/timer.c sic!). So, although we have + * perfectly valid notion of wallclock time from the hypervisor we here + * fake missed timer interrupts so that the arch independent part of + * the Timer ISR updates jiffies for us *and* once the bh gets run + * updates xtime accordingly. Yuck! + */ + + /* work out the number of jiffies past and update them */ + do_gettimeofday(&tv); + time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec; + delta = time - last_irq; + if (delta <= 0) { + printk ("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + while (delta >= us_per_tick) { + do_timer(regs); + delta -= us_per_tick; + last_irq += us_per_tick; + } + #if 0 if (!user_mode(regs)) x86_do_profile(regs->eip); #endif } - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { write_lock(&xtime_lock); @@ -216,42 +312,32 @@ static struct irqaction irq_timer = { NULL }; - - -/* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */ -static unsigned long __init calibrate_tsc(void) -{ - unsigned long quo, rem; - - /* quotient == (1000 * 2^32) / ticks_per ms */ - __asm__ __volatile__ ( - "divl %2" - : "=a" (quo), "=d" (rem) - : "r" (HYPERVISOR_shared_info->ticks_per_ms), "0" (0), "1" (1000) ); - - return(quo); -} - void __init time_init(void) { unsigned long long alarm; - - fast_gettimeoffset_quotient = calibrate_tsc(); + u64 cpu_freq = HYPERVISOR_shared_info->cpu_freq; + u64 scale; + + do_get_fast_time = do_gettimeofday; + + cpu_khz = (u32)cpu_freq/1000; + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + /* + * calculate systemtime scaling factor + * XXX RN: have to cast cpu_freq to u32 limits it to 4.29 GHz. + * Get a better do_div! + */ + scale = 1000000000LL << 32; + do_div(scale,(u32)cpu_freq); + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + printk("System Time scale: %X %X\n",st_scale_i, st_scale_f); - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { - unsigned long eax=0, edx=1000; - __asm__ __volatile__ - ("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (fast_gettimeoffset_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } + do_gettimeofday(&xtime); + last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec; + printk("last: %lld\n", last_irq); setup_irq(TIMER_IRQ, &irq_timer); @@ -260,12 +346,14 @@ void __init time_init(void) * 'domain' time. This means that clock sshould run at the correct * rate. For things like scheduling, it's not clear whether it * matters which sort of time we use. + * XXX RN: unimplemented. */ + rdtscll(alarm); +#if 0 alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms; HYPERVISOR_shared_info->wall_timeout = alarm; HYPERVISOR_shared_info->domain_timeout = ~0ULL; +#endif clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); - - do_gettimeofday(&xtime); } -- 2.30.2